import requests
from bs4 import BeautifulSoup
import os
import re

#获取要爬取某网站界面的内容
def getHtmlUrl(url):
    try:
        print(url)
        rsp = requests.get(url)
        rsp.raise_for_status()
        rsp.encoding = rsp.apparent_encoding
        #返回界面的所有内容
        return rsp.text
    except:
        print("获取界面全部内容发生异常，请检测访问地址是否正常")
        return "error!!"



#根据html解析完成的内容进行过滤并获取相关图片
def getJPG(html):
    soup = BeautifulSoup(html, 'html.parser')
    allImg = soup.find('div', class_ = 'list').find('ul').find_all("img")
    for img in allImg:
        imgName = img['alt']
        imgUrl = img['src']
        print("图片名称："+imgName+"图片地址："+imgUrl)
        #创建图片在服务器上的保存地址
        ImgPath = 'd:/Img/'+imgName+".jpg"
        try:
            if not os.path.exists('d:/Img/'):
                os.mkdir('d:/Img/')
            if not os.path.exists(ImgPath):
                r = requests.get(imgUrl)
                with open(ImgPath,'wb') as f:
                    f.write(r.content)
                    f.close()
                    print(imgName+"的图片下载成功！！")
            else:
                print(imgName+"图片已存在！！")
        except Exception as e:
            print("抓取图片失败，原因为："+str(e))



#设置需要访问图片的URL并放入到解析器当中
# def getImgUrl(img):
#     href = img['href']
#     htm = getHtmlUrl(href)
#     soup = BeautifulSoup(htm,'html.parser')
#     return soup

def getBigHtml(img):
    href = img['href']
    bigUrl = "http://www.netbian.com" + href
    text = getHtmlUrl(bigUrl)
    return text



# 获取放大图片所在的访问地址
def getHtmlImgUrl(html):
    soup = BeautifulSoup(html, 'html.parser')
    #通过正则表达式获取下面所要展示的大图片界面地址
    all_bigImg_htmlUrl = soup.find('div',class_='list').find('ul').find_all('a',
    attrs={'href':re.compile('^((?!http).)*$'), 'target':'_blank'})
    for bigImg in all_bigImg_htmlUrl:
        bigImgName = bigImg['title']
        bigImgUrl = bigImg['href']
        print("图片大标题："+bigImgName+", 图片访问界面地址："+bigImgUrl)
        print("大图片界面全部内容如下：")
        bigText = getBigHtml(bigImg)
        print(bigText)